# Description ------------------------------------------------------------------

### This performs the analysis shown in the body of the main paper, saving 
### pre-final-formatting .tex files for the tables in the main paper

### NOTE: Some of the .tex tables were slightly modified after being produced
### via this code for aesthetic purposes (e.g., adding in the control group 
### means as the first row, making the notes section at the end read a bit
### easier, etc.).

# Setup ------------------------------------------------------------------------
out_path <- 'output/main/'

## Packages --------------------------------------------------------------------
#if packages are not installed, they must be installed with:
# install.packages("package_name"); The name of package must be in quotes
library(dplyr)
library(readr)
library(tidyr)
library(purrr)
library(tibble)
library(stringr)
library(car)
library(xtable)
library(stargazer)
library(multiwayvcov)
library(haven)
library(labelled)


## Functions -------------------------------------------------------------------
source("scripts/0_functions/functions_analysis.R")

## Data ------------------------------------------------------------------------
raw_path <- 'data/1_raw/'
clean_path <- 'data/2_clean/'
formatted_path <- 'data/3_formatted/'

### CLEAN ----

#endline vendor survey data
load(paste0(clean_path, "vendor_end.Rdata"))

#baseline vendor survey data
load(paste0(clean_path, "vendor_base.Rdata"))

#tax collector endline survey data
load(paste0(clean_path, "tax_collector_end.RData"))

#tax collector baseline survey data
load(paste0(clean_path, "tax_collector_base.RData"))

### FORMATTED ----

#diff in market level tax measures from both baseline and endline incl
load(paste0(formatted_path, "market_lvl_diffs.Rdata"))

# Analysis ---------------------------------------------------------------------

## Table 2 ---------------------------------------------------------------------

### Panel A --------------------------------------------------------------------

#### Fit Models ----------------------------------------------------------------

## Recent Receipt Individual DIM
h1_o3_r7 <- lm(data = vendor_end, recent_receipt_7 ~ BU + TD + Both + 
                         block_id + as.factor(enum))

## Recent Receipt Market DID
h1_o3_r7_mkt_did_diff <- lm(data = market_lvl_diffs, 
                            recent_receipt_7 ~ BU + TD + Both + block_id)


#### Control Group Means -------------------------------------------------------

### Indvidual DIM
get_control_means(vendor_end,
                  treatment_status,
                  recent_receipt_7,
                  control_val = "Control")
### Market DID
get_control_means(market_lvl_diffs,
                  treatment_status,
                  recent_receipt_7,
                  control_val = "Control")



#### Tex Table -----------------------------------------------------------------

# add control group means
# "Individual-level model includes enumerator and block fixed-effects",
# "Individual-level model has SEs clustered on market.",
# "Market-level model includes block fixed-effects."
stargazer(h1_o3_r7,
          h1_o3_r7_mkt_did_diff,
          se = list(calc.ses.cluster(h1_o3_r7, "market", 
                                     data = vendor_end),
                    NULL),
          keep = c("BU", "TD", "Both"),
          title = "Hypothesis 1 Results Table - Individual-Level DIM and Market-Level DID",
          label = "tab:main",
          model.numbers = F,
          column.labels = c("Individual DIM", "Market DID"),
          keep.stat = c("n", "adj.rsq"),
          dep.var.labels = c("\\makecell{Evidence of Receipt\\\\from Past 7 Days}"),
          dep.var.caption = "",
          star.cutoffs = c(.05, .01, .001),
          header = F,
          out = paste0(out_path, "table2_panelA.tex"))

### Panel B --------------------------------------------------------------------

## Comparison p-values
lin_hyps <- c("BU = TD", "BU = Both", "TD = Both",
              "BU + TD = Both")

main_mods_names <- c("\\makecell{Receipt Outcome\\\\(Individual DIM)}",
                     "\\makecell{Receipt Outcome\\\\(Market DID)}")

#### Tex Table -----------------------------------------------------------------

# All together in one function
get_linhyps_ps_mods(
  list(r7_ind = h1_o3_r7,
       r7_did = h1_o3_r7_mkt_did_diff),
  lin_hyps = lin_hyps,
  cluster = list(vendor_end$market, NULL),
  correct = FALSE
) |> 
  print_linhyps_table(
    mod_names = main_mods_names,
    adj_only = FALSE,
    caption = "$p$-Values for Linear Hypothesis (Wald) Tests for Receipt Outcome Models in Table 2",
    label = "app:tab:lin_hyps:main",
    caption.placement = "top",
    include.rownames = FALSE,
    sanitize.text.function = sanitize_passthrough,
    size = "scriptsize",
    file = paste0(out_path, "table2_panelB.tex")
  )

## Table 3 ---------------------------------------------------------------------

### Fit Models ----------------------------------------------------------------
h4_dg <- lm(data = vendor_end,
                    as.numeric(tr1_clean) ~ BU + TD + Both + block_id + as.factor(enum))
h4_wc <- lm(data = vendor_end,
                    as.numeric(tr2_clean) ~ BU + TD + Both + block_id + 
                      as.factor(enum))
h5_tr9e <- lm(data = vendor_end,
                      as.numeric(tr9e_clean) ~ BU + TD + Both + block_id + 
                        as.factor(enum))
h5_tr9g <- lm(data = vendor_end,
                      as.numeric(tr9g_clean) ~ BU + TD + Both + block_id + 
                        as.factor(enum))
h5_tr9h <- lm(data = vendor_end,
                      as.numeric(tr9h_clean) ~ BU + TD + Both + block_id + 
                        as.factor(enum))

### Control Group Means --------------------------------------------------------
get_control_means(vendor_end,
                  treatment_status,
                  tr1_num,
                  tr2_num,
                  tr9e_num,
                  tr9g_num,
                  tr9h_num,
                  control_val = "Control")

### Tex Table ------------------------------------------------------------------
stargazer(h4_dg,
          h4_wc,
          h5_tr9e,
          h5_tr9g,
          h5_tr9h,
          se = list(calc.ses.cluster(h4_dg, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h4_wc, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h5_tr9e, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h5_tr9g, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h5_tr9h, "market", 
                                     data = vendor_end)),
          keep = c("BU", "TD", "Both"),
          title = "Bottom-Up Causal Mechanism Outcomes: H4 - H5",
          model.numbers = F,
          notes = c("Individual-level models include enumerator", 
                    "and block fixed-effects.",
                    "Individual-level models have SEs clustered",
                    "on market.",
                    "All outcomes are on a 4-point scale."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = c("Trust Local Gov.", "Trust in Ward Cllr.",
                             "DC Manages Funds Well",
                             "DC Transp. Using Funds",
                             "DC Transp. wrt. Collecting"), 
          font.size = "footnotesize",
          star.cutoffs = c(.05, .01, .001),
          header = F,
          model.names = T,
          out = paste0(out_path, "table3.tex"))

## Table 4 ---------------------------------------------------------------------

### Fit Models -----------------------------------------------------------------
h6_ms10 <- lm(data = vendor_end,
                      as.numeric(ms10_clean) ~ BU + TD + Both + block_id + 
                        as.factor(enum))
h6_ms1 <- lm(data = vendor_end,
                     as.numeric(ms1_clean) ~ BU + TD + Both + block_id + 
                       as.factor(enum))
h6_tc2_10 <- lm(data = vendor_end,
                        tc2_10_clean ~ BU + TD + Both + block_id + 
                          as.factor(enum))
h7_taxduty <- lm(data = vendor_end,
                         as.numeric(tc2_4b_clean) ~ BU + TD + Both + block_id + 
                           as.factor(enum))
h7_tax_even_disagree <- lm(data = vendor_end,
                                   as.numeric(pay_even_disagree) ~ BU + TD + Both + block_id + 
                                     as.factor(enum))

### Control Group Means --------------------------------------------------------
get_control_means(vendor_end,
                  treatment_status,
                  ms10_num,
                  ms1_num,
                  tc2_10_clean,
                  tc2_4b_num,
                  pay_even_disagree,
                  control_val = "Control")

### Tex Table ------------------------------------------------------------------
stargazer(h6_ms10,
          h6_ms1,
          h6_tc2_10,
          h7_taxduty,
          h7_tax_even_disagree, 
          se = list(calc.ses.cluster(h6_ms10, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h6_ms1, "market",
                                     data = vendor_end),
                    calc.ses.cluster(h6_tc2_10, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h7_taxduty, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h7_tax_even_disagree, "market", 
                                     data = vendor_end)),
          keep = c("BU", "TD", "Both"),
          title = "Bottom-Up Causal Mechanism Outcomes: H6 - H7",
          model.numbers = F,
          notes = c("Individual-level models include enumerator", 
                    "and block fixed-effects.",
                    "Individual-level models have SEs clustered",
                    "on market.",
                    "Outcomes 1 and 3 are on 4-point scale. Outcome 4 is dichotomous.",
                    "Outcome 2 is a number out of 1000."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = c("Services Satisfaction", 
                             "Satisfaction with Water Access",
                             "Percep. of Sp. on Services",
                             "Paying Tax as Duty",
                             "Pay Tax Even if Disag. w. Gov."), 
          font.size = "footnotesize",
          star.cutoffs = c(.05, .01, .001),
          header = F,
          model.names = T,
          out = paste0(out_path, "table4.tex"))

## Table 5 ---------------------------------------------------------------------


### Fit Models -----------------------------------------------------------------
h8_election3 <- lm(data = vendor_end,
                            vote_intend ~ BU + TD + Both + block_id + 
                              as.factor(enum))
h8_p1_el <- lm(data = vendor_end, petition ~ BU + TD + Both + 
                      block_id + as.factor(enum))
h8_p2_el <- lm(data = vendor_end, petition_wname ~ BU + TD + Both + 
                      block_id + as.factor(enum))
h8_st1_el <- lm(data = vendor_end, stmt1_agree_sent ~ BU + TD + Both + 
                       block_id + as.factor(enum))
h8_st2_el <- lm(data = vendor_end, stmt2_agree_sent ~ BU + TD + Both + 
                       block_id + as.factor(enum))

### Control Group Means --------------------------------------------------------
get_control_means(vendor_end,
                  treatment_status,
                  vote_intend,
                  petition,
                  petition_name,
                  stmt1_agree_sent,
                  stmt2_agree_sent,
                  control_val = "Control")

### Tex Table ------------------------------------------------------------------
stargazer(h8_election3,
          h8_p1_el,
          h8_p2_el,
          h8_st1_el,
          h8_st2_el,
          se = list(calc.ses.cluster(h8_election3, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h8_p1_el, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h8_p2_el, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h8_st1_el, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h8_st2_el, "market",
                                     data = vendor_end)),
          keep = c("BU", "TD", "Both"),
          title = "Political Engagement Outcomes",
          model.numbers = F,
          notes = c("Individual-level models include enumerator", 
                    "and block fixed-effects.",
                    "Individual-level models have SEs clustered",
                    "on market.",
                    "All models linear probability models."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = c("Vote", "Petition Anon.",
                             "Petition w. Name",
                             "Agree St. 1",
                             "Agree St. 2"),
          label = "tab:pol_eng",
          star.cutoffs = c(.05, .01, .001),
          header = F,
          out = paste0(out_path, "table5.tex"))

## Table 6 ---------------------------------------------------------------------

### Fit Models -----------------------------------------------------------------
h9_tc5a <- lm(data = vendor_end,
                      as.numeric(tc5a_clean) ~ BU + TD + Both + block_id + 
                        as.factor(enum))
h9_tc5b <- lm(data = vendor_end,
                      as.numeric(tc5b_clean) ~ BU + TD + Both + block_id + 
                        as.factor(enum))
h9_tc2_15b <- lm(data = vendor_end,
                         as.numeric(tc2_15b_clean) ~ BU + TD + Both + block_id + 
                           as.factor(enum))
h10_tc9 <- lm(data = vendor_end,
                     tc9_clean ~ BU + TD + Both + block_id + 
                       as.factor(enum))

### Control Group Means --------------------------------------------------------
get_control_means(vendor_end,
                  treatment_status,
                  tc5a_num,
                  tc5b_num,
                  tc2_15b_num,
                  tc9_clean,
                  control_val = "Control")

### Tex Table ------------------------------------------------------------------
stargazer(h9_tc5a,
          h9_tc5b,
          h9_tc2_15b,
          h10_tc9,
          se = list(calc.ses.cluster(h9_tc5a, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h9_tc5b, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h9_tc2_15b, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h10_tc9, "market", 
                                     data = vendor_end)),
          keep = c("BU", "TD", "Both"),
          title = "Top-Down Causal Mechanisms Outcomes, Vendor Survey",
          model.numbers = F,
          notes = c("Individual-level models include enumerator", 
                    "and block fixed-effects.",
                    "Individual-level models have SEs clustered",
                    "on market.",
                    "Models 1, 2, and 3 are on a 4-point scale.",
                    "Model 4 is a real number out of 1000."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = c("Could Refuse to Pay", "Group Non-Comp. Poss.",
                             "Pay Because Consequences",
                             "Money Flowing to Government"),
          star.cutoffs = c(.05, .01, .001),
          header = F,
          out = paste0(out_path, "table6.tex"))

## Table 7 ---------------------------------------------------------------------

### Fit Models -----------------------------------------------------------------
h11_hrs_in_mkt <- lm(data = tax_collector_end,
                             hrs_in_mkt ~ BU + TD + Both + block_id + 
                               as.factor(enum))
h11_vendors_visited <- lm(data = tax_collector_end,
                                  vendors_visited_trim_99 ~ BU + TD + Both + block_id + 
                                    as.factor(enum))

### Control Group Means --------------------------------------------------------
get_control_means(tax_collector_end,
                  treatment_status,
                  hrs_in_mkt,
                  vendors_visited,
                  control_val = "Control")

### Tex Table ------------------------------------------------------------------
stargazer(h11_hrs_in_mkt,
          h11_vendors_visited,
          se = list(calc.ses.cluster(h11_hrs_in_mkt, "market", 
                                     data = tax_collector_end),
                    calc.ses.cluster(h11_vendors_visited, "market", 
                                     data = tax_collector_end)),
          keep = c("BU", "TD", "Both"),
          title = "Top-Down Causal Mechanisms Outcomes, Tax Collector Survey",
          model.numbers = F,
          notes = c("Individual-level models include enumerator", 
                    "and block fixed-effects.",
                    "Individual-level models have SEs clustered",
                    "on market."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = c("Hours Working in Market A Day",
                             "Vendors Visited Per Day"),
          star.cutoffs = c(.05, .01, .001),
          header = F,
          out = paste0(out_path, "table7.tex")
          )

# Save Model Objects -----------------------------------------------------------
mod_objects <- ls()
mod_objects <- mod_objects[str_detect(mod_objects, "^h[1-9]")]
save(list = mod_objects,
     file = paste0(out_path, "main_analysis_models.RData"))
